package com.gitee.webmagic.processor;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.List;

/**
 * @author wangqiang
 * @version 1.0
 * @date 2021/3/24 21:39
 */
public class BlogPageProcessor implements PageProcessor {

    private Site site = Site.me().setRetryTimes(10).setSleepTime(100000);

    @Override
    public void process(Page page) {
        // 详情url
        List<String> detailUrls = page.getHtml().xpath("div[@id='subject_list']/ul/li/div[@class='pic']/a/@href").all();
        System.out.println("detailUrls" + detailUrls);
        // 封面图片
        List<String> imgs = page.getHtml().xpath("div[@id='subject_list']/ul/li/div[@class='pic']/a/img/@src").all();
        System.out.println("imgs" + imgs);
        // 书名
        List<String> bookNames = page.getHtml().xpath("div[@id='subject_list']/ul/li/div[@class='info']/h2/a/text()").all();
        System.out.println("bookNames" + bookNames);
        // 评分
        List<String> points = page.getHtml().xpath("div[@id='subject_list']/ul/li/div[@class='info']/div[@class='star clearfix']/span[@class='rating_nums']/text()").all();
        System.out.println("points" + points);
        // 评论
        List<String> commentCount = page.getHtml().xpath("div[@id='subject_list']/ul/li/div[@class='info']/div[@class='star clearfix']/span[@class='pl']/text()").all();
        System.out.println("commentCount" + commentCount);
        // 评论
        List<String> summarys = page.getHtml().xpath("div[@id='subject_list']/ul/li/div[@class='info']/p/text()").all();
        System.out.println("summarys" + summarys);
        // 作者 出版社 出版时间 价格 字符串
        List<String> pubInfos = page.getHtml().xpath("div[@id='subject_list']/ul/li/div[@class='info']/div[@class='pub']/text()").all();
        System.out.println("pubInfo" + pubInfos);

    }

    @Override
    public Site getSite() {
        return site;
    }

//    public static void main(String[] args) {
//        Spider.create(new BlogPageProcessor()).
//                setDownloader(new HttpClientDownloader()).
//                addUrl("https://book.douban.com/tag/%E7%A7%91%E5%AD%A6?type=R").thread(50).run();
//
//    }
}
